import pandas as pd
import plotly.express as px
import numpy as np
import itables
import country_converter as cc# Load the Human Development Index data
hdi = pd.read_csv("data/hdi_human_development_index.csv")
# Convert the dataframe from wide to long
hdi_long = hdi.melt(
id_vars = ["geo", "name"],
var_name = "year",
value_name = "human_development_index").rename(columns = {"name": "country"})
# Filter data from 2000 onwards
hdi_long["year"] = hdi_long["year"].astype(int)
hdi_long = hdi_long.query("year >= 2000")
# Create a subset for 2023
hdi_2023 = hdi_long[hdi_long["year"] == 2023]
hdi_2023["geo"] = hdi_2023["geo"].str.upper()
hdi_top_10_countries = hdi_2023.nlargest(10, "human_development_index").sort_values(
by = "human_development_index",
ascending = False
)
hdi_top_10_data = hdi_long[hdi_long["country"].isin(hdi_top_10_countries["country"])]# Load the GPD per capita data
gdp_pcap = pd.read_csv("data/gdp_pcap.csv")
# Convert the dataframe from wide to long
gdp_pcap_long = gdp_pcap.melt(
id_vars = ["geo", "name"],
var_name = "year",
value_name = "gdp_per_capita").rename(columns = {"name": "country"})
# Filter data from 2000 onwards
gdp_pcap_long["year"] = gdp_pcap_long["year"].astype(int)
#gdp_pcap_long = gdp_pcap_long.query("year >= 2000")
# Categorise the gpd per capita based on actual historical data and future projection
# The threshold is determined based on https://www.gapminder.org/data/documentation/gd001/
def categorise_year (y):
if y <= 2022:
return "Actual"
else:
return "Future Projection"
categorise_year_vec = np.vectorize(categorise_year)
gdp_pcap_long["year_category"] = categorise_year_vec(gdp_pcap_long["year"])# Load life expectancy data
life_exp = pd.read_csv("data/lex.csv")
# Convert the dataframe from wide to long
life_exp_long = life_exp.melt(
id_vars = ["geo", "name"],
var_name = "year",
value_name = "life_expectancy").rename(columns = {"name": "country"})
# Filter data from 2000 onwards
life_exp_long["year"] = life_exp_long["year"].astype(int)
life_exp_long = life_exp_long.query("year >= 2000")# Data visualization for the first indicator
# A map for HDI in 2023
hdi_map = px.choropleth(
hdi_2023,
locations = "geo",
color = "human_development_index",
hover_name = "country",
title = "Map of Countries by Human Development Index in 2023",
color_continuous_scale = "Blues",
).update_layout(coloraxis_showscale = False)
# Trends of HDI for ten countries that had the highest HDI in 2023
# Create the line chart
hdi_10_line_chart = px.line(hdi_top_10_data,
x = "year",
y = "human_development_index",
color = "country",
markers = True,
labels = {"year": "Year",
"human_development_index": "Human Development Index"},
title = "Changes in Human Development Index over time among the top ten countries in 2023")
hdi_10_line_chart.add_annotation(
text="Top ten countries in 2023: The top ten countries that have the highest Human Development Index in 2023",
xref = "paper",
yref = "paper",
x = 0.5,
y = -0.3,
showarrow = False,
xanchor="center",
font = dict(size=9)
)# Data visualization for the second indicator
# Create calculate the average GDP per capita over time
gdp_pcap_avg = gdp_pcap_long.groupby(["year", "year_category"]).agg(mean_gdp_pcap = ("gdp_per_capita", "mean")).reset_index()
# Create a bar chart
gdp_pcap_avg_bar = px.bar(gdp_pcap_avg,
x = "year",
y = "mean_gdp_pcap",
color = "year_category",
labels = {"year": "Year", "mean_gdp_pcap": "Average GDP per Capita"},
title = "Global Average GDP per Capita over time")
gdp_pcap_avg_bar.update_layout(
legend_title_text = "",
legend = dict(
orientation = "h",
x = 0.5,
y = -0.25,
xanchor = "center",
yanchor = "top"
)
)
# Create a bar chart for countries that have high std based on actual historical data
gdp_pcap_std = (
gdp_pcap_long.query("year >= 1990 and year <= 2022")
.groupby("country")
.agg(mean_gdp_pcap = ("gdp_per_capita", "mean"),
std_gdp_pcap = ("gdp_per_capita", "std"))
.nlargest(10, "std_gdp_pcap")
.sort_values(by = "std_gdp_pcap", ascending = False)
.reset_index()
)
gdp_pcap_std_bar = px.bar(gdp_pcap_std,
x = "country",
y = "mean_gdp_pcap",
error_y = "std_gdp_pcap",
labels = {"country": "Country", "mean_gdp_pcap": "Average GDP per Capita"},
title = "Average GDP per capita of the top ten countries with biggest changes based on available data from 1990 to 2022")
# Create a line chart for these ten countries
gdp_std_top_10_data = gdp_pcap_long[gdp_pcap_long["country"].isin(gdp_pcap_std["country"])].query("year >= 1990 and year <= 2022")
gdp_std_top_10_line = px.line(gdp_std_top_10_data,
x = "year",
y = "gdp_per_capita",
color = "country",
labels = {"year": "Year",
"gdp_per_capita": "GDP per Capita"},
title = "GDP per capita of the top ten countries with biggest changes based on available data from 1990 to 2022")
gdp_std_top_10_line.update_layout(
legend_title_text = "Country"
)# Data visualization for relationship bewteen two indicators
# Prepare data :merge hdi and gdp per capital data
hdi_gdp_combined = pd.merge(hdi_long, gdp_pcap_long, on = ["country", "geo", "year"], how = "inner")
hdi_gdp_scatter = px.scatter(hdi_gdp_combined,
x = "gdp_per_capita",
y = "human_development_index",
hover_name = "country",
animation_frame = "year",
labels = {"human_development_index": "Human Development Index",
"gdp_per_capita": "GDP per Capita",
"year": "Year"},
title = "GDP per capita of the top ten countries with biggest changes based on available data from 1990 to 2022",
template = "simple_white"
)hdi_mapProvide a description here.
hdi_10_line_chartitables.show(hdi_top_10_countries)| Loading ITables v2.6.1 from the internet... (need help?) |